from rag import *
import os
import pickle
def load_and_split_files(directory):
    documents = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            for ext in available_exts:
                if file.endswith(ext):
                    path = os.path.join(root, file)
                    with open(path, 'r', encoding='utf-8') as f:
                        content = f.read()
                        for i in range(0, len(content), 1024):
                            segment = content[i:i+1024]
                            documents.append(f"{segment} \n this period of content is from {path[len(directory_path):]}")
    return documents

# 用你的目标文件夹路径替换下面的'your_folder_path'
directory_path = r'D:\autom\Repositories\gsAI\geochemistrypi-main\geochemistrypi-main\docs'
documents = load_and_split_files(directory_path)

save_documents(documents, documents_file_path)